/*
 * Copyright 2009 Nicolai Haehnle.
 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
 * SPDX-License-Identifier: MIT
 */

#include "radeon_regalloc.h"
#include "radeon_list.h"

#define VERBOSE 0

#define DBG(...)                                                                                   \
   do {                                                                                            \
      if (VERBOSE)                                                                                 \
         fprintf(stderr, __VA_ARGS__);                                                             \
   } while (0)

const struct rc_class rc_class_list_vp[] = {
   {
      RC_REG_CLASS_VP_SINGLE,
      4,
      {RC_MASK_X,
       RC_MASK_Y,
       RC_MASK_Z,
       RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_VP_DOUBLE,
      6,
      {RC_MASK_X | RC_MASK_Y,
       RC_MASK_X | RC_MASK_Z,
       RC_MASK_X | RC_MASK_W,
       RC_MASK_Y | RC_MASK_Z,
       RC_MASK_Y | RC_MASK_W,
       RC_MASK_Z | RC_MASK_W},
   },
   {
      RC_REG_CLASS_VP_TRIPLE,
      4,
      {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
       RC_MASK_X | RC_MASK_Y | RC_MASK_W,
       RC_MASK_X | RC_MASK_Z | RC_MASK_W,
       RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_VP_QUADRUPLE,
      1,
      {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   }};

const struct rc_class rc_class_list_fp[] = {
   {
      RC_REG_CLASS_FP_SINGLE,
      3,
      {RC_MASK_X,
       RC_MASK_Y,
       RC_MASK_Z,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_DOUBLE,
      3,
      {RC_MASK_X | RC_MASK_Y,
       RC_MASK_X | RC_MASK_Z,
       RC_MASK_Y | RC_MASK_Z,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_TRIPLE,
      1,
      {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_ALPHA,
      1,
      {RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA,
      3,
      {RC_MASK_X | RC_MASK_W,
       RC_MASK_Y | RC_MASK_W,
       RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA,
      3,
      {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
       RC_MASK_X | RC_MASK_Z | RC_MASK_W,
       RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA,
      1,
      {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_X,
      1,
      {RC_MASK_X,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_Y,
      1,
      {RC_MASK_Y,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_Z,
      1,
      {RC_MASK_Z,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_XY,
      1,
      {RC_MASK_X | RC_MASK_Y,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_YZ,
      1,
      {RC_MASK_Y | RC_MASK_Z,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_XZ,
      1,
      {RC_MASK_X | RC_MASK_Z,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_XW,
      1,
      {RC_MASK_X | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_YW,
      1,
      {RC_MASK_Y | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_ZW,
      1,
      {RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_XYW,
      1,
      {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_YZW,
      1,
      {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   },
   {
      RC_REG_CLASS_FP_XZW,
      1,
      {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE,
       RC_MASK_NONE},
   }};

static void
print_live_intervals(struct live_intervals *src)
{
   if (!src || !src->Used) {
      DBG("(null)");
      return;
   }

   DBG("(%i,%i)", src->Start, src->End);
}

static int
overlap_live_intervals(struct live_intervals *a, struct live_intervals *b)
{
   if (VERBOSE) {
      DBG("overlap_live_intervals: ");
      print_live_intervals(a);
      DBG(" to ");
      print_live_intervals(b);
      DBG("\n");
   }

   if (!a->Used || !b->Used) {
      DBG("    unused interval\n");
      return 0;
   }

   if (a->Start > b->Start) {
      if (a->Start < b->End) {
         DBG("    overlap\n");
         return 1;
      }
   } else if (b->Start > a->Start) {
      if (b->Start < a->End) {
         DBG("    overlap\n");
         return 1;
      }
   } else { /* a->Start == b->Start */
      if (a->Start != a->End && b->Start != b->End) {
         DBG("    overlap\n");
         return 1;
      }
   }

   DBG("    no overlap\n");

   return 0;
}

int
rc_find_class(const struct rc_class *classes, unsigned int writemask,
              unsigned int max_writemask_count)
{
   unsigned int i;
   for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
      unsigned int j;
      if (classes[i].WritemaskCount > max_writemask_count) {
         continue;
      }
      for (j = 0; j < classes[i].WritemaskCount; j++) {
         if (classes[i].Writemasks[j] == writemask) {
            return i;
         }
      }
   }
   return -1;
}

unsigned int
rc_overlap_live_intervals_array(struct live_intervals *a, struct live_intervals *b)
{
   unsigned int a_chan, b_chan;
   for (a_chan = 0; a_chan < 4; a_chan++) {
      for (b_chan = 0; b_chan < 4; b_chan++) {
         if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
            return 1;
         }
      }
   }
   return 0;
}

#if VERBOSE
static void
print_reg(int reg)
{
   unsigned int index = reg_get_index(reg);
   unsigned int mask = reg_get_writemask(reg);
   fprintf(stderr, "Temp[%u].%c%c%c%c", index, mask & RC_MASK_X ? 'x' : '_',
           mask & RC_MASK_Y ? 'y' : '_', mask & RC_MASK_Z ? 'z' : '_',
           mask & RC_MASK_W ? 'w' : '_');
}
#endif

static void
add_register_conflicts(struct ra_regs *regs, unsigned int max_temp_regs)
{
   unsigned int index, a_mask, b_mask;
   for (index = 0; index < max_temp_regs; index++) {
      for (a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
         for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; b_mask++) {
            if (a_mask & b_mask) {
               ra_add_reg_conflict(regs, get_reg_id(index, a_mask), get_reg_id(index, b_mask));
            }
         }
      }
   }
}

void
rc_build_interference_graph(struct ra_graph *graph, struct rc_list *variables)
{
   unsigned node_index;
   struct rc_list *var_ptr;

   /* Build the interference graph */
   for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
      struct rc_list *a, *b;
      unsigned int b_index;

      for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; b; b = b->Next, b_index++) {
         struct rc_variable *var_a = a->Item;
         while (var_a) {
            struct rc_variable *var_b = b->Item;
            while (var_b) {
               if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
                  ra_add_node_interference(graph, node_index, b_index);
               }
               var_b = var_b->Friend;
            }
            var_a = var_a->Friend;
         }
      }
   }
}

void
rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
{
   unsigned i, j, index, class_count, max_temps;
   unsigned **ra_q_values;

   /* Pre-computed q values.  This array describes the maximum number of
    * a class's [row] registers that are in conflict with a single
    * register from another class [column].
    *
    * For example:
    * q_values[0][2] is 3, because a register from class 2
    * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
    * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
    * and T0.z.
    *
    * q_values[2][0] is 1, because a register from class 0
    * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
    * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
    *
    * The q values for each register class [row] will never be greater
    * than the maximum number of writemask combinations for that class.
    *
    * For example:
    *
    * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
    * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
    * than 1.
    */
   const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
      {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
      {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
      {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
      {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
      {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
      {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
      {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
      {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
      {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
      {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
      {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
      {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
      {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
      {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
      {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
      {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
      {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
      {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};

   const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {{1, 2, 3, 4},
                                                                               {3, 5, 6, 6},
                                                                               {3, 4, 4, 4},
                                                                               {1, 1, 1, 1}};

   if (prog == RC_FRAGMENT_PROGRAM) {
      s->class_list = rc_class_list_fp;
      class_count = RC_REG_CLASS_FP_COUNT;
      max_temps = R500_PFS_NUM_TEMP_REGS;
   } else {
      s->class_list = rc_class_list_vp;
      class_count = RC_REG_CLASS_VP_COUNT;
      max_temps = R300_VS_MAX_TEMPS;
   }

   /* Allocate the main ra data structure */
   s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW, true);

   /* Create the register classes */
   for (i = 0; i < class_count; i++) {
      const struct rc_class *class = &s->class_list[i];
      s->classes[class->ID] = ra_alloc_reg_class(s->regs);

      /* Assign registers to the classes */
      for (index = 0; index < max_temps; index++) {
         for (j = 0; j < class->WritemaskCount; j++) {
            int reg_id = get_reg_id(index, class->Writemasks[j]);
            ra_class_add_reg(s->classes[class->ID], reg_id);
         }
      }
   }

   /* Set the q values.  The q_values array is indexed based on
    * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
    * different than the ID assigned to that class by ra.
    * This why we need to manually construct this list.
    */
   ra_q_values = MALLOC(class_count * sizeof(unsigned *));

   for (i = 0; i < class_count; i++) {
      ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
      for (j = 0; j < class_count; j++) {
         if (prog == RC_FRAGMENT_PROGRAM)
            ra_q_values[i][j] = q_values_fp[i][j];
         else
            ra_q_values[i][j] = q_values_vp[i][j];
      }
   }

   /* Add register conflicts */
   add_register_conflicts(s->regs, max_temps);

   ra_set_finalize(s->regs, ra_q_values);

   for (i = 0; i < class_count; i++) {
      FREE(ra_q_values[i]);
   }
   FREE(ra_q_values);
}

void
rc_destroy_regalloc_state(struct rc_regalloc_state *s)
{
   ralloc_free(s->regs);
}
